There are 231 subjects with patients’ age and the amount of exercise expressed as estimated hours per week with 2 groups (control and patient).
data("Blackmore")
skimr::skim(Blackmore)
| Name | Blackmore |
| Number of rows | 945 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| subject | 0 | 1 | FALSE | 231 | 100: 5, 101: 5, 105: 5, 106: 5 |
| group | 0 | 1 | FALSE | 2 | pat: 586, con: 359 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 11.44 | 2.77 | 8 | 10.0 | 12.00 | 14.00 | 17.92 | ▇▇▇▆▃ |
| exercise | 0 | 1 | 2.53 | 3.50 | 0 | 0.4 | 1.33 | 3.04 | 29.96 | ▇▁▁▁▁ |
Blackmore$log.exercise <- log(Blackmore$exercise + 5/60, 2)
summary(Blackmore$log.exercise)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.5850 -1.0489 0.4991 0.2271 1.6431 4.9090
There are 93 unqiue subjects in the control group. And there are 359 observations in the control group.
control <- Blackmore[Blackmore$group == "control",]
nrow(control)
## [1] 359
skimr::skim(control)
| Name | control |
| Number of rows | 359 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| subject | 0 | 1 | FALSE | 93 | 200: 5, 205: 5, 207: 5, 212: 5 |
| group | 0 | 1 | FALSE | 1 | con: 359, pat: 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 11.26 | 2.70 | 8.00 | 8.00 | 10.00 | 13.50 | 17.92 | ▇▇▇▅▂ |
| exercise | 0 | 1 | 1.64 | 1.81 | 0.00 | 0.43 | 1.05 | 2.15 | 11.54 | ▇▂▁▁▁ |
| log.exercise | 0 | 1 | -0.05 | 1.76 | -3.58 | -0.96 | 0.18 | 1.16 | 3.54 | ▃▃▇▆▂ |
length(unique(control$subject))
## [1] 93
control.id<- unique(control$subject)
ggplot(control, aes(age, log.exercise)) +
geom_point() +
facet_grid(~ subject)
ggplot(Blackmore, aes(age, log.exercise, color = group)) +
geom_point() +
facet_wrap_paginate(~ subject, nrow = 4, ncol = 4, page = 1)
attach(Blackmore)
con <- sample(unique(subject[group == "control"]), 20)
con.20 <- Blackmore[is.element(subject, con),]
ggplot(con.20, aes(I(age-8), log.exercise)) + geom_point() + facet_wrap(.~subject)
pat <- sample(unique(subject[group == "control"]), 20)
pat.20 <- Blackmore[is.element(subject, pat),]
ggplot(pat.20, aes(I(age-8), log.exercise)) +
geom_point() + facet_wrap(.~subject) +
geom_smooth(method = "lm", se = FALSE)
detach(Blackmore)
summary(I(Blackmore$age - 8))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 4.000 3.442 6.000 9.920
fit <- lmer(log.exercise ~ age*group + (age|subject), data = Blackmore)
summary(fit)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.exercise ~ age * group + (age | subject)
## Data: Blackmore
##
## REML criterion at convergence: 3614.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7349 -0.4245 0.1228 0.5280 2.6362
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 4.89076 2.2115
## age 0.02716 0.1648 -0.78
## Residual 1.54778 1.2441
## Number of obs: 945, groups: subject, 231
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.78820 0.37546 -2.099
## age 0.06402 0.03136 2.042
## grouppatient -2.27286 0.47681 -4.767
## age:grouppatient 0.23986 0.03941 6.087
##
## Correlation of Fixed Effects:
## (Intr) age grpptn
## age -0.906
## grouppatint -0.787 0.713
## age:grpptnt 0.721 -0.796 -0.903
vcov is the covariance matrix of the fixed-effect estimates which is consistent with the mathmatical expression \(V(\hat \beta) = (X^\top \Omega^{-1} X)^{-1}\) (solve(t(X) %*% Sigma_inv %*% X)).
Variance-covariance matrix of random effects \(\mathbf G\), for example \(2\times 2\) symmetric matrix:
\[\begin{bmatrix} $g_1^2$ & $g_{12}^2$ \\ $g_{21}^2$ & $g__2^2$ \end{bmatrix}\]Using lower.tri and upper.tri to apply the lower and upper triangular part of the matrix. \(G_1\) represents the variability of the intercept across subjects and \(G_2\) refers the variablity of the slope of subjects.
N <- nrow(Blackmore)
subjects <- unique(Blackmore$subject)
y <- Blackmore$log.exercise
X <- model.matrix(~ age*group, data = Blackmore)
beta <- fixef(fit)
Z <- model.matrix(~ subject + subject:age-1, data = Blackmore)
u <- c(ranef(fit)$subject[,1], ranef(fit)$subject[,2])
q <- length(u)
vc <- as.data.frame(VarCorr(fit))
R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
G1 <- vc$vcov[vc$grp == 'subject'][1] * diag(q/2)
G2 <- vc$vcov[vc$grp == 'subject'][2] * diag(q/2)
G <- Matrix::bdiag(G1, G2)
G[lower.tri(G)] <- vc$vcov[vc$grp == 'subject'][3]
G[upper.tri(G)] <- vc$vcov[vc$grp == 'subject'][3]
Sigma <- Z %*% G %*% t(Z) + R
Sigma_inv <- solve(Sigma)
P <- Sigma_inv - Sigma_inv %*% X %*% vcov(fit) %*% t(X) %*% Sigma_inv
varMargRes <- Sigma - X %*% vcov(fit) %*% t(X)
varCondRes <- R %*% P %*% R
varU <- G - G %*% t(Z) %*% P %*% Z %*% G
fit_bl <- Blackmore %>%
mutate(fitted = as.vector(X %*% beta),
resm = y - fitted,
resc = y - fitted - Z %*% u,
index = 1:n()) %>%
rowwise() %>%
mutate(resm_std = resm/sqrt(diag(varMargRes)[index]),
resc_std = resc/sqrt(diag(varCondRes)[index])) %>%
ungroup()
unit_bl <- expand_grid(subjects) %>%
mutate(Vi = map_dbl(subjects, ~{
ind <- Blackmore %>%
mutate(index = 1:n()) %>%
filter(subject == .x) %>%
pull(index)
mi <- length(ind)
Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% fit_bl$resm[ind]
sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
}),
Mi = as.vector(t(u) %*% solve(varU) %*% u),
index = 1:n())
ggplot(fit_bl, aes(fitted, resm_std)) +
geom_hline(yintercept = 0) +
geom_point()
ggplot(fit_bl, aes(index, resm_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(unit_bl, aes(index, Vi)) + geom_point()+ xlab("Unit indicies") + ylab("Modified Lesaffre-Verbeke index")
ggplot(fit_bl, aes(index, resc_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(fit_bl, aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(fit_bl, aes(sample = resc_std)) +
geom_qq() + geom_qq_line(color = "red")
ggplot(unit_bl, aes(index, Mi)) + geom_point() + xlab("Unit index") + ylab("Manhalanobis distance")
ggplot(unit_bl, aes(sample = Mi)) +
geom_qq(distribution = stats::qchisq,
dparams = list(df = q)) +
geom_qq_line(color = "red",
distribution = stats::qchisq,
dparams = list(df = q))
lineup(null_permute("resm_std"), fit_bl) %>%
ggplot(aes(fitted, resm_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resm_std"), fit_bl) %>%
ggplot(aes(index, resm_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("Vi"), unit_bl) %>%
ggplot(aes(index, Vi)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(index, resc_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(fitted, resc_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(sample = resc_std)) +
geom_qq() +
geom_qq_line(color = "red") +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
Since the \(M_i\) are the same, there is no need to do that
lineup(null_permute("Mi"), unit_bl) %>%
ggplot(aes(index, Mi))+
geom_point()+
facet_wrap(~.sample)
lineup(null_permute("Mi"), unit_bl) %>%
ggplot(aes(sample = Mi)) +
geom_qq(distribution = stats::qchisq,
dparams = list(df = q)) +
geom_qq_line(color = "red",
distribution = stats::qchisq,
dparams = list(df = q)) +
facet_wrap(~.sample)
d <- lineup(null_permute("log.exercise"), Blackmore)
simdat <- map_dfr(1:20, function(i){
df <- d %>% filter(.sample == i)
m <- fit <- lmer(log.exercise ~ I(age-8)*group + (1 |subject), data = df)
N <- nrow(df)
subjects <- unique(df$subject)
y <- df$log.exercise
X <- model.matrix(~ I(age-8)*group, data = df)
beta <- fixef(m)
Z <- model.matrix(~ subject-1, data = df)
u <- ranef(m)$subject[,1]
q <- length(u)
vc <- as.data.frame(VarCorr(m))
R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
G <- vc$vcov[vc$grp == 'subject'] * diag(q)
Sigma <- Z %*% G %*% t(Z) + R
Sigma_inv <- solve(Sigma)
P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv
varMargRes <- Sigma - X %*% vcov(m) %*% t(X)
varCondRes <- R %*% P %*% R
varU <- G - G %*% t(Z) %*% P %*% Z %*% G
fitted = as.vector(X %*% beta)
resm = y - fitted
resc = y - fitted - Z %*% u
index = 1:nrow(df)
resm_std = resm / sqrt(diag(varMargRes)[index])
resc_std = resc / sqrt(diag(varCondRes)[index])
tibble::tibble(df, fitted, resm_std, resc_std, index)
})